<?php
require_once(dirname(dirname(__FILE__)).'/include/common.php');

set_time_limit(0);
//header("Content-Type: application/xml; charset=gbk");
/* 	1.先把一页面所有需要采集的列表地址以数组方式存取为一文件
	2.当有地址列表参数传送过来时，则直接采集这个页面
	3.当采集完成后，转跳
*/  	
$snoopy = new Snoopy(); 
$cachefiles = new Cachefile();
ini_set("display_errors", "On");
error_reporting(E_ALL | E_STRICT);
/*
//$snoopy->proxy_host = "72.247.48.10";
//$snoopy->proxy_port = "80";
$snoopy->rawheaders["Pragma"] = "no-cache";
$snoopy->expandlinks = true; //是否补全链接 在采集的时候经常用到
$snoopy->agent = "(Mozilla/5.0 (Windows NT 5.1; rv:19.0) Gecko/20100101 Firefox/19.0)"; //伪装浏览器  
$snoopy->referer = "http://www.microsnot.com/";
$snoopy->maxredirs = 20; //重定向次数
$snoopy->rawheaders["X_FORWARDED_FOR"] = "127.0.0.101"; //伪装ip  
$snoopy->cookies["PHPSESSID"] = 'hp2rzi2msgio0j55iumn5eb2'; //伪装sessionid 
*/
$android=array();



$pageid = $_GET['pageid'];

$page=(int)$_GET['page'];

$id = (int)$_GET['id'];

//如果存在pageid,则直接读取文件
$file_num = 1;
$urldata = $cachefiles->readCachefile($file_num.'_pagelist.php');


$nums= count($urldata);
$fetchurl=trim($urldata[$pageid]);
	

if(empty($page)) $page=1;



$allurls = $cachefiles->readCachefile('jiancai365_cn_arr.php');



//1.先取到上次采集的最高ID号
$fromID = file_get_contents(CACHE_PAGE_ROOT."/jiancai365_cn_old.txt");
if($id){
	$fromID = $id;
}else{
	//$fromID = 100000;
	if($fromID<=0) { 
		echo '没有最大ID号，请指定一个进行采集';
		exit;
	}
	
	$fromID = $fromID+1;
}

$endID = $fromID+150;

//$dateDir = date('y/md/',time()-3600*24*2);
$dateDir = date('y/md/',time());
//$ids = array(856671,1,63000);
for($i=$fromID;$i<=$endID;$i++){
//foreach ($ids  as $i) {
		/*
		if($i>=70000){
			//写入最大执行到的ID
			file_put_contents(CACHE_PAGE_ROOT."/jiancai365_cn_old_i.txt",'success');
		}else{
			//写入最大执行到的ID
			file_put_contents(CACHE_PAGE_ROOT."/jiancai365_cn_old_i.txt",$i);
		}
		*/
		//写入最大执行到的ID
		file_put_contents(CACHE_PAGE_ROOT."/jiancai365_cn_old_i.txt",$i);


		//$fetchurl = 'http://news.jc001.cn/detail/1.html';

		echo $fetchurl = 'http://www.jiancai365.cn/cp_'.$i.'.htm';
		$httpStatus = GetHttpStatusCode($fetchurl);
		if(!in_array($httpStatus,array('200','206','301','302'))){
			file_put_contents(CACHE_PAGE_ROOT."/jiancai365_cn_old.txt",$i);
		}
	   

		//如果页面返回是200状态
		if(in_array($httpStatus,array('200','206','301','302'))){

		
		
		$oldid = $oldurl =  $android['oldurl'] = $fetchurl;
		
		$pagecontent = file_get_contents($fetchurl);
		
		//文章标题
		 if(preg_match_all("/<h1>([\s\S]+?)<\/h1>/i",$pagecontent,$mypics)) {
				
				$title = $android['title']=trim($mypics[1][0]);
				$android['title'] = iconv("gbk","UTF-8",$android['title']);
				$title = addslashes($android['title']);
				
		 }
		
		 //取到分类，根据分类信息对应到原系统的分类
		 if(preg_match_all("/<div class=\"nav3_l\">([\s\S]+?)<\/div>/i",$pagecontent,$mypics)) {
				
				 if(preg_match_all("/<a([\s\S]+?)>([\s\S]+?)<\/a>/i",trim($mypics[0][0]),$navcontent)) {
				
					//$title = $android['title']=trim($mypics[1][0]);
					
					$android['navName'] =$navcontent[2][3];
					
					$android['navName'] = iconv("gbk","UTF-8",$android['navName']);
				
				}
				
		 }
		/*
		$sql = "SELECT *  FROM `v9_category` WHERE `arrparentid` LIKE '%6%'";
		$newquery = $WEBDB->query($sql);
		$cats = $cat = array();
		while($crow = $WEBDB->fetch_array($newquery)) {
			$catname = trim($crow['catname']);
			$md5 = md5($catname);
			$cat[$md5] = array('catname'=>$catname,'catid'=>$crow['catid'],'catdir'=>$crow['catdir']);
			$cats[$crow['catid']] = array('catname'=>$catname,'catid'=>$crow['catid'],'catdir'=>$crow['catdir']);
			
		}
		$urldata = $cachefiles->writeCachefile('jiancai365_cn_arr.php',$cat);
		*/

		//通过取到的分类，对应到本系统的分类信息
		$urlArr = $allurls[md5(trim($android['navName']))];
	    $cid =  $urlArr['catid'];
	    $catdir = $urlArr['catdir'];
		

		//检测是否已采集，如果存在。则放弃
		$sql="SELECT id FROM `v9_product` WHERE  oldurl='$oldurl' OR title = '$title'";
		$sofresult = $WEBDB->fetch_first($sql);
		
		if((!$sofresult['id'])&&$cid){
			
			//$android['title']=iconv("gbk","UTF-8",$android['title']);
			
			
			if (preg_match_all("/<div([\s\S]+?)class=\"a1\">([\s\S]+?)<\/div>/i",$pagecontent,$content)){ 

			$pageonehtml = strip_tags(trim(replaceHtmlAndJs($content[2][0])),'<p><img><table><span><h1><h2><h3><h4><h5><h6><tr><td><br><br/>');
		
		
			}

			//
			$pageonehtml = preg_replace_callback("/<([IMG|img]+)([\s\S]+?)src=([\"|']?)([^\"'>]+\.(gif|jpg|jpeg|bmp|png))([\s\S]*?)([\/])?>/i", 'jiacai365cn',$pageonehtml);

			//取到第一页内容
			if (preg_match_all("/<dl class=\"txt_24\">([\s\S]+?)<\/dl>/i",$pagecontent,$content)){ 

				$twocontenthtml = strip_tags(trim(replaceHtmlAndJs($content[1][0])),'<p><img><table><span><h1><h2><h3><h4><h5><h6><tr><td><br><br/>');
			
			
			}
		
		
			
			/*
			//如果有分页，则取到相关分页新闻
			if($ispage){
				$twocontenthtml = pagefetch($android['pageurl']);
			}
			*/
			$android['content'] = $pageonehtml.$twocontenthtml;
			
	
			//处理缩略图
			
			$android['content'] = preg_replace_callback("/<([IMG|img]+)([\s\S]+?)src=(['\|\"]+)([\s\S]+?)(['\|\"]+)([\s\S]*?)([\/])?>/i", 'jiacaidoimages',$android['content']);
			
			
			if(preg_match_all("/(src)=([\"|']?)([^ \"'>]+\.(gif|jpg|jpeg|bmp|png))\\2/i", $android['content'], $matches)) {
					$android['thumb'] = $matches[3][0];
			}
			$android['content'] = iconv("gbk","UTF-8",$android['content']);
			
			$oldtime = rand(60,200);
			$android['created'] = time()-3600*24*$oldtime;

			
	
			 //数据入库
			
			$title = addslashes($android['title']);
			$thumb = addslashes($android['thumb']); //缩略图。需处理
			$keywords = addslashes($android['title']); //关键词。需处理
			$updated = addslashes($android['created']);
			$created = addslashes($android['created']);
			$content = addslashes($android['content']);
			$description = addslashes(cutword($android['content'],120));
			$cid = addslashes($cid);
			$readpoint = rand(1000,5000);
			$userid = 1;
			$oldurl = addslashes($android['oldurl']);
			
			//取到设备记录值的ID号 if(strip_tags($title)&&strip_tags($content) && trim($title) && (strlen($title)>5)) {
			if(($title!='')&&($content!='') && (strlen($title)>5)){
				//取到设备记录值的ID号
				$sql = "INSERT INTO `v9_product` 
				(`catid`, `typeid`, `title`, `style`, `thumb`, `keywords`, `description`, `posids`, `url`, `listorder`, `status`, `sysadd`, `islink`, `username`, `inputtime`, `updatetime`, `uid`, `oldurl`) 
				VALUES 
				('$cid', '0', '$title','', '$thumb', '$keywords', '$description', '0', '',  '', '99', '1', '0', '都市家', '$created', '$updated', '$userid','$oldurl');";

			
				$re = $WEBDB->query($sql);
				$PID = $WEBDB->insert_id();

				
				if($PID){
					$hitsid = 'c-12-'.$PID;
					$views = rand(100,10000);
		
					$sql ="INSERT INTO `v9_hits` (`hitsid`, `catid`, `views`, `yesterdayviews`, `dayviews`, `weekviews`, `monthviews`, `updatetime`) VALUES ('$hitsid', '$cid', '$views', '$views', '$views', '$views', '$views', '$created');";
					$re = $WEBDB->query($sql);
					
					$sql ="INSERT INTO `v9_search` (`searchid`, `typeid`, `id`, `adddate`, `data`, `siteid`) VALUES (NULL, '1', '$PID', '$created', '$title', '1');";
					$re = $WEBDB->query($sql);


					echo $url = 'http://product.dushijia.com/'.$catdir.'/'.date('Y',$created).'/'.date('md',$created).'/'.$PID.'.shtml';
				
					$sql = "UPDATE `v9_product` SET `url` = '$url' WHERE `v9_product`.`id` =$PID";
					$re = $WEBDB->query($sql);
					
					$sql="INSERT INTO `v9_product_data` (`id`, `content`, `readpoint`, `groupids_view`, `paginationtype`, `maxcharperpage`, `template`, `paytype`, `relation`, `voteid`, `allow_comment`, `copyfrom`) VALUES ('$PID', '$content', '2', '', '0', '10000', '0', '', '0', '', '1', '|0');";
					$WEBDB->query($sql);
					/*
					$rand_num = rand(2,100);
					if($rand_num%2){
						if($thumb!=''){
							$posid = rand(2,15);
						}else{
							$posid = rand(2,15);
						}
						$sql = "INSERT INTO `v9_position_data` (`id`, `catid`, `posid`, `module`, `modelid`, `thumb`, `data`, `siteid`, `listorder`, `expiration`, `extention`, `synedit`) VALUES ('$PID', '$cid', '$posid', 'content', '1', '1', 'array (
	  ''title'' => ''$title'',
	  ''description'' => ''$description...'',
	  ''thumb'' => ''$thumb'',
	  ''inputtime'' => ''$created'',
	  ''style'' => '''',
	)',  '1', '$PID', '0', NULL, '0');";
						$rn = $WEBDB->query($sql);
						if($rn){
							$sql = "UPDATE `v9_product` SET `posids` = '1' WHERE `v9_product`.`id` =$PID";
							$re = $WEBDB->query($sql);
						}
					}*/
				}
				
				//再次写入采集成功的最大执行到的ID
				file_put_contents(CACHE_PAGE_ROOT."/jiancai365_cn_old.txt",$i);
			}
			

			$html='已采集原地址为：'.$android['oldurl'].'<br/>';
			$html.='软件标题为：'.$android['title'].'<br/>';
			/*
			$htmls[0] = 'http://www.dushijia.com/index.php?m=content&c=create_html&a=batch_show&dosubmit=1&catid='.$cid.'&ids='.$PID.'&steps=0&kylingood=kylingood';

			$data = Curl_http($htmls,'10');//调用
			*/
			
			unset($android);
			unset($htmls);	
			
			set_time_limit(0);	
				
			
		
		}
		

		unset($android);
	}

	
}

//更新网站首页 http://dushijia.com/index.php?m=content&c=create_html&a=public_index&pc_hash=1otYRx
$htmls[0] = 'http://www.dushijia.com/index.php?m=content&c=create_html&a=public_index&kylingood=kylingood';
$data = Curl_http($htmls,'10');//调用



//更新资讯首页http://dushijia.com/index.php?m=content&c=create_html&a=category
exit;
	$pageid=$pageid+1;
	echo $redirecturl='news.php?pageid='.$pageid.'&page='.$page;

	$urldatas[] = $redirecturl;
	echo '<br/>';
	echo $html;
	$cachefiles->writeCachefile($file_num.'_nowpagelist.php', $urldatas);
	
	echo '<meta http-equiv="refresh" content="2; url='.$redirecturl.'" /> ';
	exit;
//}




 function Curl_http($array,$timeout){
 	$res = array();
 	$mh = curl_multi_init();//创建多个curl语柄
	$startime = getmicrotime();
 	foreach($array as $k=>$url){
 		$conn[$k]=curl_init($url);
 		
        curl_setopt($conn[$k], CURLOPT_TIMEOUT, $timeout);//设置超时时间
        curl_setopt($conn[$k], CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 5.1; rv:18.0) Gecko/20100101 Firefox/18.0');
        curl_setopt($conn[$k], CURLOPT_MAXREDIRS, 7);//HTTp定向级别
        curl_setopt($conn[$k], CURLOPT_HEADER, 0);//这里不要header，加块效率
        curl_setopt($conn[$k], CURLOPT_FOLLOWLOCATION, 1); // 302 redirect
        curl_setopt($conn[$k],CURLOPT_RETURNTRANSFER,1);
        curl_multi_add_handle ($mh,$conn[$k]);
 	}
	 //防止死循环耗死cpu 这段是根据网上的写法
		do {
			$mrc = curl_multi_exec($mh,$active);//当无数据，active=true
		} while ($mrc == CURLM_CALL_MULTI_PERFORM);//当正在接受数据时
		while ($active and $mrc == CURLM_OK) {//当无数据时或请求暂停时，active=true
			if (curl_multi_select($mh) != -1) {
				do {
					$mrc = curl_multi_exec($mh, $active);
				} while ($mrc == CURLM_CALL_MULTI_PERFORM);
			}
		}
 	
 		foreach ($array as $k => $url) {
 		  curl_error($conn[$k]);
    	  $res[$k]=curl_multi_getcontent($conn[$k]);//获得返回信息
    	  $header[$k]=curl_getinfo($conn[$k]);//返回头信息
    	  curl_close($conn[$k]);//关闭语柄
    	  curl_multi_remove_handle($mh  , $conn[$k]);   //释放资源  
		}
		
		curl_multi_close($mh);
		$endtime = getmicrotime();
		$diff_time = $endtime - $startime;
		
		return array('diff_time'=>$diff_time,
					 'return'=>$res,
					'header'=>$header		
					);
 	
 }
 //计算当前时间
 function getmicrotime() {
	list($usec, $sec) = explode(" ",microtime());
	return ((float)$usec + (float)$sec);
}

//采集分页内容
function pagefetch($urls){
	
	$contenthtml = '';
	foreach($urls as $url){
		
		//$url = 'http://www.bmlink.com'.$url;
		$pagecontent = file_get_contents($url);

		
	   /*
		$num=count($temp)-1;
		$pagecontent = $temp[$num];
		*/
		
		//取到内容
		if (preg_match_all("/<div([\s\S]+?)id=\"mainCnt\">([\s\S]+?)<\/div>/i",$pagecontent,$content)){ 

			$contenthtml.= strip_tags(trim(replaceHtmlAndJs($content[2][0])),'<p><img><table><span><h1><h2><h3><h4><h5><h6><tr><td><br><br/>');
			
		 }

		
		
	}

	return  $contenthtml;

}


function replaceHtmlAndJs($document)
{
	$document = trim($document);
	if (strlen($document) <= 0)
	{
	   return $document;
	}
	/*
	$search = array ("'<script[^>]*?>.*?</script>'si",  // 去掉 javascript
					  "'<[\/\!]*?[^<>]*?>'si",          // 去掉 HTML 标记
					  "'([\r\n])[\s]+'",                // 去掉空白字符
					  "'&(quot|#34);'i",                // 替换 HTML 实体
					  "'&(amp|#38);'i",
					  "'&(lt|#60);'i",
					  "'&(gt|#62);'i",
					  "'&(nbsp|#160);'i",
					  "'&(ldquo);'i",
					  "'&(rdquo);'i",
					  "'&(mdash);'i",
				      "'&(bull);'i"

					  );                    // 作为 PHP 代码运行

	$replace = array ("",
					   "",
					   "\\1",
					   "\"",
					   "&",
					   "<",
					   ">",
					   " ",
					   " ",
					   " ",
		               " ",
		               " "
					   );


	*/

	$search = array ("'<script[^>]*?>.*?</script>'si",
		// 去掉 javascript

					  );                    // 作为 PHP 代码运行

	$replace = array ("",
					   );

	return @preg_replace ($search, $replace, $document);
}

		/**
		 * 去掉html
		 * @param unknown_type $content
		 * @param unknown_type $allowtags
		 * @return Ambigous <mixed, string>
		 */
		function ClearHtml($content,$allowtags='') {
			mb_regex_encoding('UTF-8');
			//replace MS special characters first
			$search = array('/&lsquo;/u', '/&rsquo;/u', '/&ldquo;/u', '/&rdquo;/u', '/&mdash;/u');
			$replace = array('\'', '\'', '"', '"', '-');
			$content = preg_replace($search, $replace, $content);
			//make sure _all_ html entities are converted to the plain ascii equivalents - it appears
			//in some MS headers, some html entities are encoded and some aren't
			$content = html_entity_decode($content, ENT_QUOTES, 'UTF-8');
			//try to strip out any C style comments first, since these, embedded in html comments, seem to
			//prevent strip_tags from removing html comments (MS Word introduced combination)
			if(mb_stripos($content, '/*') !== FALSE){
				$content = mb_eregi_replace('#/\*.*?\*/#s', '', $content, 'm');
			}
			//introduce a space into any arithmetic expressions that could be caught by strip_tags so that they won't be
			//'<1' becomes '< 1'(note: somewhat application specific)
			$content = preg_replace(array('/<([0-9]+)/'), array('< $1'), $content);

			$content = strip_tags($content, $allowtags);
			//eliminate extraneous whitespace from start and end of line, or anywhere there are two or more spaces, convert it to one
			$content = preg_replace(array('/^\s\s+/', '/\s\s+$/', '/\s\s+/u'), array('', '', ' '), $content);
			//strip out inline css and simplify style tags
			$search = array('#<(strong|b)[^>]*>(.*?)</(strong|b)>#isu', '#<(em|i)[^>]*>(.*?)</(em|i)>#isu', '#<u[^>]*>(.*?)</u>#isu');
			$replace = array('<b>$2</b>', '<i>$2</i>', '<u>$1</u>');
			$content = preg_replace($search, $replace, $content);

			//on some of the ?newer MS Word exports, where you get conditionals of the form 'if gte mso 9', etc., it appears
			//that whatever is in one of the html comments prevents strip_tags from eradicating the html comment that contains
			//some MS Style Definitions - this last bit gets rid of any leftover comments */
			$num_matches = preg_match_all("/\<!--/u", $content, $matches);
			if($num_matches){
				$content = preg_replace('/\<!--(.)*--\>/isu', '', $content);
			}

			$content = preg_replace("/<!--[^>]*-->/i", "", $content);//注释内容
			$content = preg_replace("/style=.+?['|\"]/i",'',$content);//去除样式
			$content = preg_replace("/class=.+?['|\"]/i",'',$content);//去除样式
			$content = preg_replace("/id=.+?['|\"]/i",'',$content);//去除样式
			$content = preg_replace("/lang=.+?['|\"]/i",'',$content);//去除样式
			$content = preg_replace("/width=.+?['|\"]/i",'',$content);//去除样式
			$content = preg_replace("/height=.+?['|\"]/i",'',$content);//去除样式
			$content = preg_replace("/border=.+?['|\"]/i",'',$content);//去除样式
			$content = preg_replace("/size=.+?['|\"]/i",'',$content);//去除样式
			$content = preg_replace("/size=.+?['|\"]/i",'',$content);//去除样式
			if($allowtags){
				$content = strip_tags($content,$allowtags);
			}else{
				$content = strip_tags($content);
			}
			$content = preg_replace("/<!--[^>]*-->/i", "", $content);//注释内容
			$content = preg_replace("/style=.+?['|\"]/i",'',$content);//去除样式
			$content = preg_replace("/class=.+?['|\"]/i",'',$content);//去除样式
			$content = preg_replace("/id=.+?['|\"]/i",'',$content);//去除样式
			$content = preg_replace("/lang=.+?['|\"]/i",'',$content);//去除样式
			$content = preg_replace("/width=.+?['|\"]/i",'',$content);//去除样式
			$content = preg_replace("/height=.+?['|\"]/i",'',$content);//去除样式
			$content = preg_replace("/border=.+?['|\"]/i",'',$content);//去除样式
			$content = preg_replace("/size=.+?['|\"]/i",'',$content);//去除样式
			if($allowtags){
				$content = strip_tags($content,$allowtags);
			}else{
				$content = strip_tags($content);
			}
			return $content;
		}


//Remove the exploer'bug XSS
function RemoveXSS($val) {
   // remove all non-printable characters. CR(0a) and LF(0b) and TAB(9) are allowed
   // this prevents some character re-spacing such as <java\0script>
   // note that you have to handle splits with \n, \r, and \t later since they *are* allowed in some inputs
   $val = preg_replace('/([\x00-\x08,\x0b-\x0c,\x0e-\x19])/', '', $val);
   // straight replacements, the user should never need these since they're normal characters
   // this prevents like <IMG SRC=@avascript:alert('XSS')>
   $search = 'abcdefghijklmnopqrstuvwxyz';
   $search .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
   $search .= '1234567890!@#$%^&*()';
   $search .= '~`";:?+/={}[]-_|\'\\';
   for ($i = 0; $i < strlen($search); $i++) {
      // ;? matches the ;, which is optional
      // 0{0,7} matches any padded zeros, which are optional and go up to 8 chars

      // @ @ search for the hex values
      $val = preg_replace('/(&#[xX]0{0,8}'.dechex(ord($search[$i])).';?)/i', $search[$i], $val); // with a ;
      // @ @ 0{0,7} matches '0' zero to seven times
      $val = preg_replace('/(&#0{0,8}'.ord($search[$i]).';?)/', $search[$i], $val); // with a ;
   }

   // now the only remaining whitespace attacks are \t, \n, and \r
   $ra1 = array('javascript', 'vbscript', 'expression', 'applet', 'meta', 'xml', 'blink', 'link', 'style', 'script', 'embed', 'object', 'iframe', 'frame', 'frameset', 'ilayer', 'layer', 'bgsound', 'title', 'base');
   $ra2 = array('onabort', 'onactivate', 'onafterprint', 'onafterupdate', 'onbeforeactivate', 'onbeforecopy', 'onbeforecut', 'onbeforedeactivate', 'onbeforeeditfocus', 'onbeforepaste', 'onbeforeprint', 'onbeforeunload', 'onbeforeupdate', 'onblur', 'onbounce', 'oncellchange', 'onchange', 'onclick', 'oncontextmenu', 'oncontrolselect', 'oncopy', 'oncut', 'ondataavailable', 'ondatasetchanged', 'ondatasetcomplete', 'ondblclick', 'ondeactivate', 'ondrag', 'ondragend', 'ondragenter', 'ondragleave', 'ondragover', 'ondragstart', 'ondrop', 'onerror', 'onerrorupdate', 'onfilterchange', 'onfinish', 'onfocus', 'onfocusin', 'onfocusout', 'onhelp', 'onkeydown', 'onkeypress', 'onkeyup', 'onlayoutcomplete', 'onload', 'onlosecapture', 'onmousedown', 'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup', 'onmousewheel', 'onmove', 'onmoveend', 'onmovestart', 'onpaste', 'onpropertychange', 'onreadystatechange', 'onreset', 'onresize', 'onresizeend', 'onresizestart', 'onrowenter', 'onrowexit', 'onrowsdelete', 'onrowsinserted', 'onscroll', 'onselect', 'onselectionchange', 'onselectstart', 'onstart', 'onstop', 'onsubmit', 'onunload');
   $ra = array_merge($ra1, $ra2);

   $found = true; // keep replacing as long as the previous round replaced something
   while ($found == true) {
      $val_before = $val;
      for ($i = 0; $i < sizeof($ra); $i++) {
         $pattern = '/';
         for ($j = 0; $j < strlen($ra[$i]); $j++) {
            if ($j > 0) {
               $pattern .= '(';
               $pattern .= '(&#[xX]0{0,8}([9ab]);)';
               $pattern .= '|';
               $pattern .= '|(&#0{0,8}([9|10|13]);)';
               $pattern .= ')*';
            }
            $pattern .= $ra[$i][$j];
         }
         $pattern .= '/i';
         $replacement = substr($ra[$i], 0, 2).'<x>'.substr($ra[$i], 2); // add in <> to nerf the tag
         $val = preg_replace($pattern, $replacement, $val); // filter out the hex tags
         if ($val_before == $val) {
            // no replacements were made, so exit the loop
            $found = false;
         }
      }
   }
   return $val;
}

function GetHttpStatusCode($url){
   $curl = curl_init();
   curl_setopt($curl,CURLOPT_URL,$url);//获取内容url
   curl_setopt($curl,CURLOPT_HEADER,1);//获取http头信息
   curl_setopt($curl,CURLOPT_NOBODY,1);//不返回html的body信息
   curl_setopt($curl,CURLOPT_RETURNTRANSFER,1);//返回数据流，不直接输出
   curl_setopt($curl,CURLOPT_TIMEOUT,30); //超时时长，单位秒
   curl_exec($curl);
   $rtn= curl_getinfo($curl,CURLINFO_HTTP_CODE);
   curl_close($curl);
   return  $rtn;
  }
 

 function jiacai365cn($matches){
	
	$newpirctrue=array();
	$pictures=$filelist=array();

	$filesuffix = array('jpeg','jpg','gif','png');
	
	$url= trim($matches[4]);
	
	$pos1 = stripos($url, 'http://');
	if ($pos1 === false){
		$url='http://www.jiancai365.cn/'.$url;
	}
	

	$pattern = '/onclick=(.*)/i';
	$replacement = '';
	$matches[6] = preg_replace($pattern, $replacement, $matches[6]);


	return '<img '.$matches[2].' src="'.$url.'"  '.$matches[6].' />';
 }

//数据入库 
exit;
